In the Receiver Operating Characteristic Curve (Figure 8), Zone 2 has a higher false positive rate around TPR=0.8. I'm curious whether that might be because the "false positives" are actually plausible bubbles, improperly tagged as negative examples.
In all 3 zones, about 1/3 of the false positives with scores > the threshold for TPR=0.8 are plausible detections. Thus, there isn't any evidence that the FPR rate is systematically overestimated for Zone 2, comapred to 1 or 3.
In [9]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import json
from bubbly.extractors import RGBExtractor
from sklearn.metrics import roc_curve
In [31]:
labels = json.load(open('../models/benchmark_scores.json'))
on = labels['on_score']
off = labels['off_score']
yp = np.array(on + off)
y = np.array([1] * len(on) + [0] * len(off))
pars = np.array(labels['on'] + labels['off'])
masks = [None] * 3
threshs = [None] * 3 # score threshold where TPR = 0.8
for i in [0, 1, 2]:
masks[i] = pars[:, 0] % 3 == i
fp, tp, thresh = roc_curve(y[masks[i]], yp[masks[i]])
plt.plot(fp, tp)
ind = np.searchsorted(tp, 0.8)
print 'Zone %i: TPR=%0.2f, FPR=%0.5F, Thresh=%0.2F' % (i, tp[ind], fp[ind], thresh[ind])
threshs[i] = thresh[ind]
plt.xlim(0, .002)
plt.ylim(0, 1)
Out[31]:
In [42]:
(masks[1] & (y == 0)).sum()
Out[42]:
In [32]:
masks[0].sum(), masks[1].sum(), masks[2].sum()
Out[32]:
In [38]:
ex = RGBExtractor()
ex.shp = (200, 200)
for i in [0, 1, 2]:
hit = masks[i] & (yp >= threshs[i]) & (y == 0)
fpr = 1.0 * hit.sum() / (masks[i] & (y == 0)).sum()
print 'Zone ', i
im = np.hstack([ex.extract(p[0], p[1], p[2], p[3] * 1.5) for p in pars[hit]])
plt.imshow(im)
plt.show()
In [ ]: